# Author: Gregory Smith
# Date: 12 - 5 - 2018
# Title: Bootstrapped AME Code 
# Job: Includes the Code For Calculating the AMEs included in the paper
#   and for calcuating Welch's T Tests 

# Original Code relied upon # packageVersion("margins") # ‘0.3.23’


#---- Packages 

list.of.packages <- c("tidyverse", "glue", "kableExtra", "margins")

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

library(tidyverse)
library(kableExtra)
library(margins)

setwd("~/Dropbox/covert_ops_paper/2019-smith-io-secret-but-constrained-replication-files/")

#---- Data 

dta <- read_csv("data/smith_2019_secret_but_constrained_dta.csv")

#----- Functions 

#---- Average Marginal Effects (AME) Plot

# Tidy_ame calculates AMEs and turns the restults into a tidy dataframe  
# The default number of draws is 1000 

tidy_boot_ame <- function(x, n_iterations = 1000) {
  
  library(tibble)
  library(margins)
  library(janitor)
  
  set.seed(671)
  
  clean_names(as.tibble(summary(margins(x, vce = "bootstrap",
                                        iterations = n_iterations))))  
  
}

#---- SE to SD Conversion Function 

se_to_sd_calculation <- function(se, n = 1000) {
  
  se*(sqrt(n))
  
}


#----- Welch's T-Test Function 

# Note on Arguments:
# m1, m2: The sample means
# sd1, sd2: The sample standard deviations
# n1, n2: The sample sizes
# m0: The null value for the difference in means to be tested for. 
#      The default values is 0. 
# name1, name2: The names of the two samples 

welch_t_test <- function(m1, m2, sd1, sd2, n1, n2, m0 = 0, name1, name2) {
  
  library(glue)
  
  se <- sqrt( (sd1^2/n1) + (sd2^2/n2) )
  df <- ( (sd1^2/n1 + sd2^2/n2)^2 )/( (sd1^2/n1)^2/(n1-1) + (sd2^2/n2)^2/(n2-1) )
  
  t <- (m1 - m2 - m0)/se 
  test_values <- data.frame(test_values = c(m1 - m2, se, t, 2*pt(-abs(t), df)))    
  test_stats <- data.frame(test_stat = c("difference_of_means", "std_error",
                                         "t", "p_value"))
  
  dta <- cbind(test_stats, test_values)
  
  dta <- dta %>% mutate(test_name = glue("{name1} - {name2}"))
  
  return(dta) 
  
}


#----- Models 

divided_orke_base <- glm(orourke_onset ~  divided + 
                           factor(president) - 1 +
                           orourke_peace_years + orourke_peace_years_2  +
                           orourke_peace_years_3,
                         data = dta, family = "binomial")


divided_orke_full <- glm(orourke_onset ~  divided + 
                           election  + approval_q1 +
                           democracy + cinc_proportion + clean_russ_inf + 
                           oversight_change + 
                           orourke_peace_years + orourke_peace_years_2 +
                           orourke_peace_years_3,
                         data = dta, family = "binomial")


divided_cmb_base <- glm(us_covert_onset ~  divided + 
                          factor(president) - 1 +
                          us_peace_years + us_peace_years_2 + us_peace_years_3, 
                        data = dta, family = "binomial")

divided_cmb_full <- glm(us_covert_onset ~  divided + 
                          election  + approval_q1 + democracy + cinc_proportion +
                          clean_russ_inf + oversight_change +
                          us_peace_years + us_peace_years_2 + us_peace_years_3, 
                        data = dta, family = "binomial")


minor_us_base_mid_div <- glm(minor_us_onset ~  divided + 
                               factor(president) - 1 + 
                               minor_us_py + I(minor_us_py^2)  +
                               I(minor_us_py^3), 
                             data = dta, family = "binomial")

minor_us_mid_div <- glm(minor_us_onset ~  divided + 
                          election  + approval_q1 + 
                          democracy + cinc_proportion + clean_russ_inf + 
                          oversight_change + 
                          factor(president) - 1 + 
                          minor_us_py + I(minor_us_py^2)  +
                          I(minor_us_py^3), 
                        data = dta, family = "binomial")

major_us_base_mid_div <- glm(major_us_onset ~  divided + 
                               factor(president) - 1 + 
                               major_us_py + I(major_us_py^2)  +
                               I(major_us_py^3), 
                             data = dta, family = "binomial")


major_us_mid_div <- glm(major_us_onset ~ divided + 
                          election  + approval_q1 +
                          democracy + cinc_proportion + clean_russ_inf + 
                          factor(president) - 1 + 
                          major_us_py + I(major_us_py^2)  +
                          I(major_us_py^3), 
                        data = dta, family = "binomial")


#---- Calculates the Bootstrapped Marginal Effects 

orke_boot_me <- tidy_boot_ame(divided_orke_base) %>%
  mutate(model = "O'Rourke Onset Base")

orke_boot_full_me <- tidy_boot_ame(divided_orke_full) %>%
  mutate(model = "O'Rourke Onset Full")

pooled_boot_me <- tidy_boot_ame(divided_cmb_base) %>%
  mutate(model = "Pooled Onset Base")

pooled_boot_full_me <- tidy_boot_ame(divided_cmb_full) %>%
  mutate(model = "Pooled Onset Full")

minor_mid_boot_me <- tidy_boot_ame(minor_us_base_mid_div) %>%
  mutate(model = "Minor MID Base")

minor_mid_full_boot_me <- tidy_boot_ame(minor_us_mid_div) %>%
  mutate(model = "Minor MID Full")

major_mid_boot_me <- tidy_boot_ame(major_us_base_mid_div) %>%
  mutate(model = "Major MID Base")

major_mid_full_boot_me <- tidy_boot_ame(major_us_mid_div) %>%
  mutate(model = "Major MID Full ")


#---- Combines the tidy output into the same data frame 

ame_dta <- bind_rows(orke_boot_me, orke_boot_full_me, 
                     pooled_boot_me, pooled_boot_full_me,
                     minor_mid_boot_me, minor_mid_full_boot_me,
                     major_mid_boot_me, major_mid_full_boot_me) %>% 
  filter(factor == "divided") 

ame_table_dta <- ame_dta %>% 
  dplyr::mutate(factor = "Divided Government") %>% 
  dplyr::select(model, ame:upper, -z) %>% 
  mutate_if(is.numeric, round, digits = 4) %>%  
  dplyr::rename(AME = ame, SE = se, Lower = lower, Upper = upper, Model = model,
                `P Value` = p)


#---- Creates a Latex Table from the Tidy AME Data frame for all of the AMEs
ame_table_dta %>% 
  mutate(model_id = if_else(str_detect(Model, "Base") == TRUE, 1, 0)) %>% 
  arrange(desc(model_id), desc(Model)) %>%
  dplyr::select(-model_id) %>% 
  kable("latex", booktabs = T,
        caption = "The Average Marginal Effects of Divided Government",
        linesep = c("", "", "", "\\hline")) %>% # adds linesep every four lines
  kable_styling(latex_options = c("striped", "hold_position", "scale_down"))

#---- Creates an AME Plot for all of the AMEs 
# Highlight the entire code or else case_when won't run 

ame_dta <- ame_dta %>%
  mutate(significant = if_else(p <= 0.05, 1, 0),
         plot_id = case_when(
             str_detect(model, "Full") == TRUE  ~ "Full Models",
             str_detect(model, "Base") == TRUE  ~ "Base Models",
             TRUE                                ~ "interact")) %>% 
  separate(model, c("dv", "b", "c"), sep = " ", remove = FALSE) %>% 
  dplyr::select(-b, -c) 

           
ame_plot <- ggplot(ame_dta, aes(x = ame, y = dv, xmin = lower,
                                color = significant, xmax = upper, height = 0)) + 
  geom_point() + geom_vline(xintercept = 0) + geom_errorbarh() + 
  facet_wrap(~plot_id) + theme_bw() + #  nrow = 2
  scale_colour_gradient(low = "gray50", high = "gray1", guide = "legend") +
  theme(legend.position = "none") +
  ylab("Dependent Variable") + xlab("AME of Divided Government") 

#ggsave("~/Desktop/divided_ame_boot_plot.pdf", plot = ame_plot,
#       height = 2.5, width = 6, units = "in")


#------- Welch's T-Test (Difference Between AME Samples)

ork_base_sd <- se_to_sd_calculation(0.00508)
ork_full_sd <- se_to_sd_calculation(0.00361)

pool_base_sd <- se_to_sd_calculation(0.00771)
pool_full_sd <- se_to_sd_calculation(0.00549)

minor_base_sd <- se_to_sd_calculation(0.00626)
minor_full_sd <- se_to_sd_calculation(0.00557)

major_base_sd <- se_to_sd_calculation(0.04568)
major_full_sd <- se_to_sd_calculation(0.00557)


#---  Major MIDs and O'Rourke (base)

wt_1 <- welch_t_test(m1 = -0.00634, m2 = -0.0118,
                     sd1 = major_base_sd, sd2 = ork_base_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Major MID Base", name2 = "O'Rourke Base")


#----  Minor MIDsand O'Rourke (base)

wt_2 <- welch_t_test(m1 = -0.00837, m2 = -0.0118,
                     sd1 = minor_base_sd, sd2 = ork_base_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Minor MID Base", name2 = "O'Rourke Base")

#--- Major MIDs and Pooled Covert Ops (base)

wt_3 <- welch_t_test(m1 = -0.00634, m2 = -0.0265,
                     sd1 = major_base_sd, sd2 = pool_full_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Major MID Base", name2 = "Pooled Base")

#---- Minor MIDs and Pooled Covert Opse   (base)

wt_4 <- welch_t_test(m1 = -0.00837, m2 = -0.0265,
                     sd1 = minor_base_sd, sd2 = pool_full_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Minor MID Base", name2 = "Pooled Base")

#--- Major MIDs and O'Rourke (full)

wt_5 <- welch_t_test(m1 = -0.0073, m2 = -0.0083,
                     sd1 = major_full_sd, sd2 = ork_full_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Major MID Full", name2 = "O'Rourke Full")


#----  Minor MIDs and O'Rourke (full)

wt_6 <- welch_t_test(m1 = -0.0053, m2 = -0.0083,
                     sd1 = minor_full_sd, sd2 = ork_full_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Minor MID Full", name2 = "O'Rourke Full")

#--- Major MIDs and Pooled Covert Ops (full)

wt_7 <- welch_t_test(m1 = -0.0073, m2 = -0.0092,
                     sd1 = major_full_sd, sd2 = pool_base_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Major MID Full", name2 = "Pooled Full")

#---- Minor MIDs and Pooled Covert Opse (full)

wt_8 <- welch_t_test(m1 = -0.0053, m2 = -0.0092,
                     sd1 = minor_full_sd, sd2 = pool_base_sd,
                     n1 = 1000, n2 = 1000,
                     name1 = "Minor MID Full", name2 = "Pooled Full")


#----- Combines Welch T Test Results Into a Single Dataframe 

welch_dta <- bind_rows(wt_1, wt_2, wt_3, wt_4, wt_5, wt_6, wt_7, wt_8) %>% 
  spread(test_stat, test_values) %>% 
  mutate_if(is.numeric, round, digits = 4) %>% 
  dplyr::select(test_name, difference_of_means, std_error, p_value) %>% 
  mutate(model_id = if_else(str_detect(test_name, "Base") == TRUE, 1, 0)) %>% 
  rename(Test = test_name, `Diff. In Means` = difference_of_means,
         P = p_value, SE = std_error)


#---- Presents the Results in a Latex Table 

welch_dta %>% 
  mutate(model_id = if_else(str_detect(Test, "Base") == TRUE, 1, 0),
         dv_id = if_else(str_detect(Test, "Minor") == TRUE, 0, 1),
         significant = if_else(P < 0.05, 1, 0)) %>% 
  arrange(desc(model_id), dv_id) %>% 
  dplyr::select(-model_id, -dv_id) %>% 
  kable("latex", booktabs = T,
        caption = "Results of Welch's T Tests",
        linesep = c("", "", "", "\\hline")) %>% # adds linesep every four lines
  kable_styling(latex_options = c("striped", "hold_position", "scale_down"))



